
use Data/daily_survey_web, clear

* construct treatment vars over full sample period
egen endexperiment_date =max(read_date)
replace endexperiment_date= endexperiment_date +1
format endexperiment_date %d
sort account_number read_date
***********
***ORIGINAL TREATMENT VARS 
**********
***********
* CONSTUCT new treatment vars
**********
sort account_number read_date
by account_number: gen CumEmailOpen_monthly=sum(NEmailOpen)
by account_number: gen CumEmailSent_monthly=sum(NEmailSent)
by account_number: gen CumClicked_monthly=sum(NClicked)

* time varying proportion of emails opened
gen PEmailOpen_monthly = NEmailOpen/NEmailSent
label variable PEmailOpen "Proportion of emails opened"

* time varying cumulative proportion opened
gen CumPEmailOpen_monthly = CumEmailOpen/CumEmailSent



format opened_date received_date read_date activated_date clicked_date accessed_date loggedin_date %d


* old definitions of treatment
gen treatment =0 
replace treatment =1 if read_date>=received_date 
label variable treatment "meter read is post received date"

gen treated = 0
replace treated = 1 if read_date>= accessed_date
label variable treated "meter read is post accessed date"

* re code if accessed< received

drop openedcampaignemail receivedcampaignemail clickedcampaignemail firstopenedcampaignemail

sort account_number read_date
xtset account_number read_date
order account_number read_date firstobserved_date lastobserved_date opened_date received_date visited_date clicked_date activated_date accessed_date quantiles_prior 
format firstobserved lastobserved %d
drop if large_user2==1
drop large*
drop if read_date < 19175


* drop early pilot households 
gen pilot1=(opened_date>=19227)&(opened_date<19281)	  // Aug 22 - Oct 14 (75 houses)
gen pilot2_old=(opened_date>=19281)&(opened_date<19304)   // Oct 15 - Oct 26 (139 houses)
gen pilot2 = (opened_date>=19283)&(opened_date<19304)  // Oct 17 
drop if pilot1==1 
drop if pilot2==0 & pilot2_old==1

gen t = nmi/100000
gen distributor = string(t)
replace distributor = substr(distributor,1,3)
destring distributor, replace
label define distributorvals 600 "Jemena" 610 "Citipower" 620 "Powercor" 630 "SPAusNet" 640 "United"
label values distributor distributorvals
drop t 

*** errors - people receiving emails before smart meters
drop if firstob > received_date 
** approx 30 accounts

save Data/intervalwholiday.dta, replace

** restrict to active households
drop if daily< 4



save Data/Interval_sample.dta, replace
